exp <- str_extract(getwd(), 'exp\\d[a-z]?')
d <- read.csv(paste0(here::here(), '/experiments/analysis/', exp, '/data/', exp, '_main_raw.csv'))
N <- length(unique(d$subject))
head(d)

Initial sample size is 41.

getwd()
## [1] "/home/dave/Dropbox (Lehigh University)/post_doc/professional/practical_new_world/gaita/experiments/analysis/exp3/scripts/preprocessing"

Check runtimes

(mostly for adjusting after pilot)

d %>% 
  group_by(subject) %>% 
  summarize(runtime = max(phase_runtime_mins)) %>% 
  ggplot(aes(x = reorder(subject, runtime), y = runtime)) + 
  geom_bar(stat = 'identity') +
  geom_hline(yintercept = 30, linetype = 'dashed') +
  labs(
    x = 'Subject',
    y = 'Main Phase Runtime (mins)'
  ) + 
  theme_bw() +
  theme(axis.text.x = element_blank())

Going to take everyone under an hour and average as the completion time.

runtimes <- d %>% 
  group_by(subject) %>% 
  summarize(runtime = max(phase_runtime_mins)) 
avg_runtime <- mean(runtimes[runtimes$runtime < 60,]$runtime)


desired_trials <- function(avg_runtime, n_trials = 308) {
  mins_per_trial <- avg_runtime / n_trials
  return(30 / mins_per_trial)
}

print(paste0('Target trials to be 30 min: ', round(desired_trials(avg_runtime))))
## [1] "Target trials to be 30 min: 305"

Accuracy & Response Time

For visual search

## rt distribution
d %>% 
  #filter(subject %in% sample(unique(d$subject), 10)) %>% 
  ggplot(aes(x = search_rt)) + 
  geom_histogram() + 
  facet_wrap(~subject, scales = 'free') + 
  labs(
    caption = 'A randomly-sampled ten participants'
  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## calc proportion dropped
proportion_dropped <- function(rows_before, rows_now) {
  return((rows_before - rows_now) / rows_before)
}

## gross rt threshold
rows_before <- nrow(d)
d <- d[d$search_rt < 20000,]
trim_summary <- data.frame(reason = 'Search RTs above 20s', proportion_trimmed = proportion_dropped(rows_before, nrow(d)))
rows_before <- nrow(d)
d <- d[d$choice_rt < 20000,]
trim_summary <- rbind(trim_summary, data.frame(reason = 'Choice RTs above 20s', proportion_trimmed = proportion_dropped(rows_before, nrow(d))))

d %>% 
  group_by(subject) %>% 
  summarize(accuracy = 1 - mean(error), rt = mean(search_rt)) %>% 
  ggplot(aes(x = rt/1000, y = accuracy, group = subject)) + 
  geom_hline(yintercept = .75, linetype = 'dotted') + 
  geom_vline(xintercept = .3, linetype = 'dotted') + 
  geom_point() + 
  labs(
    x = ' Average Response Time (s)',
    y = 'Average Accuracy'
  ) + 
  theme_bw()

Location bias

d$selected_deck_location_num <- ifelse(d$selected_deck_location == 'right', 1, -1)
d %>% 
  group_by(subject) %>% 
  summarize(sdln = mean(selected_deck_location_num)) %>% 
  ggplot(aes(x = subject, y = abs(sdln))) + 
  geom_hline(yintercept = .9, linetype = 'dotted') + 
  geom_point() + 
  coord_flip() + 
  ylim(0,1) +
  labs(
    x = 'Subject',
    y = 'Location Bias'
  ) + 
  theme_bw() + 
  theme(axis.text.y = element_blank(),
        axis.ticks = element_blank())

Data Trimming

## cut subjects under 75% accuracy
bad_subjects <- d %>% 
  group_by(subject) %>% 
  summarize(error = mean(error), rt = mean(search_rt), location_bias = abs(mean(d$selected_deck_location_num))) %>% 
  filter(error > .25 | rt < .03 | location_bias > .9) %>% 
  select(subject)
n_bad_subjects <- nrow(bad_subjects)
d <- d[!(d$subject %in% bad_subjects$subject),]

There were 6 subjects dropped for having less than 75% accuracy.

## remove choice rts beyond 
hist(d$choice_rt)

Average choice RT across Ps

d %>% 
  filter(choice_rt < 2500) %>% 
  ggplot(aes(x = choice_rt, group = subject)) + 
  geom_density(alpha = .3, fill = 'steelblue') + 
  labs(
    x = 'Choice RT (ms)',
    caption = 'Choices < 2,500 ms only.'
  ) + 
  theme_bw()

Really not crazy about these subjects having sharp peaks with \(<500~ms\).

rows_before <- nrow(d)
d <- d[d$choice_rt > 300 & d$choice_rt < 10000,]
print(paste0('Proportion dropped: ', round(proportion_dropped(rows_before, nrow(d)), 3), ', ', rows_before - nrow(d), '/', nrow(d)))
## [1] "Proportion dropped: 0.103, 1110/9634"

Trimming choice trials by participant-wise choice rt sds

rows_before <- nrow(d)
d <- d %>% 
  group_by(subject) %>% 
  summarize(choice_rt_m = mean(choice_rt), choice_rt_sd = sd(choice_rt)) %>% 
  inner_join(d) %>% 
  filter(choice_rt >= choice_rt_m - (2*choice_rt_sd) & choice_rt <= choice_rt_m + (2*choice_rt_sd))
## Joining, by = "subject"
print(paste0('Proportion dropped: ', round(proportion_dropped(rows_before, nrow(d)), 3), ', ', rows_before - nrow(d), '/', nrow(d)))
## [1] "Proportion dropped: 0.043, 414/9220"

Compute variables

d$ssd <- 1 - d$selectedRiskyDeck
write.csv(d, paste0(here::here(), '/experiments/analysis/', exp, '/data/', exp, '_main.csv'), row.names = FALSE)
 

Analysis Homepage

A work by Dave Braun

dab414@lehigh.edu